### TEST: glimmav1 dataset (MArrayLM) ###

library(Glimma)
library(limma)
library(GlimmaV2)

data(lymphomaRNAseq)
rnaseq <- lymphomaRNAseq

# add lane
groups <- data.frame(genotype=rnaseq$samples$group,
                     lane= as.character(c(rep(4,5),3,3)),
                     miscCont=c(rep(4000,5),300,250),
                     miscDisc=c("blue","red",rep("green",5)))

# add libsize
groups <- rnaseq$samples$group

# fit
design <- model.matrix(~0+groups)
contrasts <- cbind(Smchd1null.vs.WT=c(-1,1))

# convert raw counts to logCPM values by automatically extracting libsizes and normalisation factors from x
vm <- voomWithQualityWeights(rnaseq, design=design)
fit <- lmFit(vm, design=design)
fit <- contrasts.fit(fit, contrasts)
fit <- eBayes(fit)
dtFit <- decideTests(fit)
class(fit)
## [1] "MArrayLM"
## attr(,"package")
## [1] "limma"
# general plot
library("tidyverse")
## -- Attaching packages ----------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.2     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts -------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
formatCounts <- function(counts, anno)
{
  anno <- cbind(anno, sample=colnames(counts))
  counts %>%
    as_tibble() %>%
    mutate(gene = rownames(counts)) %>%
    pivot_longer(colnames(counts), names_to = "sample", values_to = "count") %>%
    left_join(anno)
  split_counts <- counts %>%
    as_tibble() %>%
    mutate(gene = rownames(counts)) %>%
    pivot_longer(colnames(counts), names_to = "sample", values_to = "count") %>%
    left_join(anno) %>%
    group_split(gene, .keep = FALSE)
  return(split_counts)  
}

x <- formatCounts(rnaseq$counts, as.data.frame(rnaseq$samples$group))
## Joining, by = "sample"
## Joining, by = "sample"
glimmaMA(fit, counts=rnaseq$counts, groups=rnaseq$samples$group)
# verify against limma
plotMD(fit, status=dtFit)

### TEST: rnaseq-123 dataset (MArrayLM) ###

library(edgeR, quietly=TRUE)
library(Mus.musculus, quietly=TRUE)
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following object is masked from 'package:limma':
## 
##     plotMA
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:base':
## 
##     expand.grid
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:purrr':
## 
##     reduce
## The following object is masked from 'package:grDevices':
## 
##     windows
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
library(GlimmaV2, quietly=TRUE)

# load data
files <- c("GSM1545535_10_6_5_11.txt", "GSM1545536_9_6_5_11.txt", 
   "GSM1545538_purep53.txt", "GSM1545539_JMS8-2.txt", 
   "GSM1545540_JMS8-3.txt", "GSM1545541_JMS8-4.txt", 
   "GSM1545542_JMS8-5.txt", "GSM1545544_JMS9-P7c.txt", 
   "GSM1545545_JMS9-P8c.txt")
x <- readDGE(files, columns=c(1,3))

# add sample info
samplenames <- substring(colnames(x), 12, nchar(colnames(x)))
colnames(x) <- samplenames
group <- as.factor(c("LP", "ML", "Basal", "Basal", "ML", "LP", 
                     "Basal", "ML", "LP"))
x$samples$group <- group
lane <- as.factor(rep(c("L004","L006","L008"), c(3,4,2)))
x$samples$lane <- lane

# add gene info
geneid <- rownames(x)
genes <- select(Mus.musculus, keys=geneid, columns=c("SYMBOL", "TXCHROM"), 
                keytype="ENTREZID")
## 'select()' returned 1:many mapping between keys and columns
genes <- genes[!duplicated(genes$ENTREZID),]
x$genes <- genes

# transformations from raw scale
cpm <- cpm(x)
lcpm <- cpm(x, log=TRUE)

# remove lowly expressed genes
keep.exprs <- filterByExpr(x, group=group)
x <- x[keep.exprs,, keep.lib.sizes=FALSE]

# normalising gene expression distributions
x <- calcNormFactors(x, method = "TMM")

# generate design
design <- model.matrix(~0+group+lane)
colnames(design) <- gsub("group", "", colnames(design))
contr.matrix <- makeContrasts(
   BasalvsLP = Basal-LP, 
   BasalvsML = Basal - ML, 
   LPvsML = LP - ML, 
   levels = colnames(design))
par(mfrow=c(1,2))
v <- voom(x, design)
vfit <- lmFit(v, design)
vfit <- contrasts.fit(vfit, contrasts=contr.matrix)
efit <- eBayes(vfit)
tfit <- treat(vfit, lfc=1)
dt <- decideTests(tfit)
# test general plot (final column)
glimmaMA(tfit)
plotMD(tfit, status=dt[,3])

# test applying coef
glimmaMA(tfit, status=dt,  coef=2)
plotMD(tfit, column=2, status=dt[,2], main=colnames(tfit)[2])

# test status.colours
glimmaMA(tfit, status=dt, status.colours=c("#DD7373","#3B3561","#51A3A3"))
## TEST: DGEExact ##
d <- matrix(rnbinom(16,size=1,mu=10),4,4)
rownames(d) <- c("a","b","c","d")
colnames(d) <- c("A1","A2","B1","B2")
d <- DGEList(counts=d,group=factor(c("A","A","B","B")))
d <- estimateCommonDisp(d)
results <- exactTest(d)
class(results)
## [1] "DGEExact"
## attr(,"package")
## [1] "edgeR"
glimmaMA(results)
plotMD(results)

## TEST: DGELRT ##

x <- estimateDisp(x, design)
xGlm <- glmFit(x, design)
lrt <- glmLRT(xGlm)
lrt
## An object of class "DGELRT"
## $coefficients
##            Basal        LP         ML     laneL006   laneL008
## 497097 -12.02957 -17.35909 -17.051247  0.474294727 0.35230334
## 20671  -14.08464 -16.38004 -15.896113  0.892919287 1.76232448
## 27395  -10.73099 -11.07025 -10.765272 -0.102443892 0.03628641
## 18777  -10.20812 -10.40762  -9.941686  0.009848289 0.13404240
## 21399  -10.19313 -10.44680 -10.460501 -0.012359432 0.10733490
## 16619 more rows ...
## 
## $fitted.values
##         Samples
## Tags      10_6_5_11    9_6_5_11    purep53    JMS8-2      JMS8-3      JMS8-4
##   497097   0.790095    1.347819  355.68529  515.1586    4.620131    2.416674
##   20671    2.196067    4.412337   45.28392  100.7132   23.225142   10.314581
##   27395  457.439229  764.721838 1306.25491 1059.5105 1468.007553  783.563915
##   18777  887.463665 1742.665035 2203.56759 1999.7633 3742.946326 1700.851849
##   21399  853.361528 1037.238862 2236.84819 1985.3792 2178.879386 1599.571632
##         Samples
## Tags        JMS8-5    JMS9-P7c    JMS9-P8c
##   497097  526.2100    1.230457   0.7541756
##   20671   102.8737   16.786348   8.7355745
##   27395  1082.2396  506.716744 316.9227837
##   18777  2042.6630 1273.301475 677.9945432
##   21399  2027.9703  737.899631 634.7604596
## 16619 more rows ...
## 
## $deviance
## [1] 2.885355 6.802933 3.951154 3.048092 1.714055
## 16619 more elements ...
## 
## $iter
## [1] 9 9 4 5 4
## 16619 more elements ...
## 
## $failed
## [1] FALSE FALSE FALSE FALSE FALSE
## 16619 more elements ...
## 
## $method
## [1] "levenberg"
## 
## $unshrunk.coefficients
##            Basal        LP         ML     laneL006   laneL008
## 497097 -12.03199 -17.43168 -17.106427  0.477327156 0.35675279
## 20671  -14.09308 -16.40941 -15.920510  0.906223262 1.78401646
## 27395  -10.73111 -11.07043 -10.765403 -0.102458543 0.03629495
## 18777  -10.20820 -10.40771  -9.941744  0.009849778 0.13405250
## 21399  -10.19321 -10.44689 -10.460597 -0.012359281 0.10734525
## 16619 more rows ...
## 
## $df.residual
## [1] 4 4 4 4 4
## 16619 more elements ...
## 
## $design
##   Basal LP ML laneL006 laneL008
## 1     0  1  0        0        0
## 2     0  0  1        0        0
## 3     1  0  0        0        0
## 4     1  0  0        1        0
## 5     0  0  1        1        0
## 6     0  1  0        1        0
## 7     1  0  0        1        0
## 8     0  0  1        0        1
## 9     0  1  0        0        1
## attr(,"assign")
## [1] 1 1 1 2 2
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"
## 
## attr(,"contrasts")$lane
## [1] "contr.treatment"
## 
## 
## $offset
##          [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
## [1,] 17.19608 17.40491 17.90603 17.79913 18.15952 17.83674 17.82036 16.95706
##         [,9]
## [1,] 16.7928
## attr(,"class")
## [1] "CompressedMatrix"
## attr(,"Dims")
## [1] 5 9
## attr(,"repeat.row")
## [1] TRUE
## attr(,"repeat.col")
## [1] FALSE
## 16619 more rows ...
## 
## $dispersion
## [1] 0.04420639 0.27384159 0.03127786 0.02062430 0.01667028
## 16619 more elements ...
## 
## $prior.count
## [1] 0.125
## 
## $samples
##                              files group lib.size norm.factors lane
## 10_6_5_11 GSM1545535_10_6_5_11.txt    LP 32857304    0.8943956 L004
## 9_6_5_11   GSM1545536_9_6_5_11.txt    ML 35328624    1.0250186 L004
## purep53     GSM1545538_purep53.txt Basal 57147943    1.0459005 L004
## JMS8-2       GSM1545539_JMS8-2.txt Basal 51356800    1.0458455 L006
## JMS8-3       GSM1545540_JMS8-3.txt    ML 75782871    1.0162707 L006
## JMS8-4       GSM1545541_JMS8-4.txt    LP 60506774    0.9217132 L006
## JMS8-5       GSM1545542_JMS8-5.txt Basal 55073018    0.9961959 L006
## JMS9-P7c   GSM1545544_JMS9-P7c.txt    ML 21305254    1.0861026 L008
## JMS9-P8c   GSM1545545_JMS9-P8c.txt    LP 19955335    0.9839203 L008
## 
## $genes
##   ENTREZID SYMBOL TXCHROM
## 1   497097   Xkr4    chr1
## 5    20671  Sox17    chr1
## 6    27395 Mrpl15    chr1
## 7    18777 Lypla1    chr1
## 9    21399  Tcea1    chr1
## 16619 more rows ...
## 
## $prior.df
## [1] 4.212451
## 
## $AveLogCPM
## [1]  1.6038865 -0.4788595  4.2358837  5.3160202  5.0130895
## 16619 more elements ...
## 
## $table
##             logFC     logCPM         LR     PValue
## 497097 0.50826629  1.6038865 0.17475701 0.67591825
## 20671  2.54249679 -0.4788595 7.85701514 0.00506239
## 27395  0.05235023  4.2358837 0.04353588 0.83471950
## 18777  0.19338230  5.3160202 0.91075337 0.33991461
## 21399  0.15485153  5.0130895 0.70779003 0.40017845
## 16619 more rows ...
## 
## $comparison
## [1] "laneL008"
## 
## $df.test
## [1] 1 1 1 1 1
## 16619 more elements ...
glimmaMA(lrt)
### Test: DESeqDataset ###

library("pasilla", quietly=TRUE)
library("DESeq2", quietly=TRUE)
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:purrr':
## 
##     simplify
## The following objects are masked from 'package:base':
## 
##     aperm, apply, rowsum
library(GlimmaV2)
pasCts <- system.file("extdata",
                      "pasilla_gene_counts.tsv",
                      package="pasilla", mustWork=TRUE)
pasAnno <- system.file("extdata",
                       "pasilla_sample_annotation.csv",
                       package="pasilla", mustWork=TRUE)
cts <- as.matrix(read.csv(pasCts,sep="\t",row.names="gene_id"))
coldata <- read.csv(pasAnno, row.names=1)
coldata <- coldata[,c("condition","type")]
coldata$condition <- factor(coldata$condition)
coldata$type <- factor(coldata$type)

rownames(coldata) <- sub("fb", "", rownames(coldata))
cts <- cts[, rownames(coldata)]

dds <- DESeqDataSetFromMatrix(countData = cts,
                              colData = coldata,
                              design = ~ condition)
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
glimmaMA(dds)